import os
import sys

dataset = sys.argv[1]

if dataset not in ('MiSeq', 'HiSeq', 'CAGE', 'Fantom4', 'StartSeq'):
    raise Exception("Unknown dataset '%s'" % dataset)

def read_libraries(dataset):
    directory = "/osc-fs_home/mdehoon/Data/CASPARs/"
    subdirectory = os.path.join(directory, dataset, 'Mapping')
    filenames = os.listdir(subdirectory)
    libraries = []
    for filename in filenames:
        library, extension = os.path.splitext(filename)
        assert extension == ".bam"
        if dataset == "MiSeq" and library == "neg_r1":
            # protocol negative control
            continue
        if dataset == "HiSeq" and library == "t01_r3":
            # sample negative control
            continue
        libraries.append(library)
    return sorted(libraries)

libraries = read_libraries(dataset)

output = open('script.sh', 'w')
for library in libraries:
    script_name = "script_%s_%s.sh" % (dataset, library)
    handle = open(script_name, 'w')
    handle.write("""\
#!/bin/bash
#SBATCH -e ./script_%s_%s.stderr
#SBATCH -o ./script_%s_%s.stdout
#SBATCH --mem=4G
python prepare_enhancer_prediction.py %s %s
""" % (dataset, library, dataset, library, dataset, library))
    handle.close()
    output.write('sbatch %s\n' % script_name)
output.close()
print("Scripts generated; run script.sh to schedule them on Grid Engine")
